import pandas as pd
import numpy as np

# 1-创建特征列表。
column_names = ['Sample code number', 'Clump Thickness', 'Uniformity of Cell Size', 'Uniformity of Cell Shape',
                'Marginal Adhesion', 'Single Epithelial Cell Size', 'Bare Nuclei', 'Bland Chromatin', 'Normal Nucleoli',
                'Mitoses', 'Class']

# 2-使用pandas.read_csv函数从互联网读取指定数据。
data = pd.read_csv(
    'https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data',
    names=column_names)
print(data.shape)

# 3-首先将字符串形式的数组转换为对象类型，以便进行字符串比较
data = data.astype(object)

# 4-将 '?' 替换为 np.nan
data[data == '?'] = np.nan

# 5-丢弃带有缺失值的数据（只要有一个维度有缺失）
data = data.dropna(how='any')

# 6-查看数据结果
print(data.shape)